txt <- pdf_text(file.path(idir, file_documentation))[41:44]
lines <- unlist(strsplit(txt, "\n", fixed = TRUE))
start_idx <- grep("^Table 8\\. ICD 10 Mortality Tabulation List 1", lines)
end_idx <- grep("^Table 9\\. ICD 10 detailed codes \\(3rd and 4th characters\\)", lines)
table_lines <- lines[(start_idx + 1):(end_idx - 1)]
fixed_lines <- character(0)
for (ln in table_lines) {
if (grepl("^\\s*\\d{4}\\b", ln)) {
fixed_lines <- c(fixed_lines, ln)
} else {
if (length(fixed_lines) > 0) {
fixed_lines[length(fixed_lines)] <- paste(
fixed_lines[length(fixed_lines)],
str_squish(ln)
)}}}
fixed_lines_trim <- str_trim(fixed_lines)
split_mat <- str_split_fixed(fixed_lines_trim, "\\s{2,}", n = 3)
df_icd10 <- data.frame(
Code = split_mat[, 1],
Code_range = split_mat[, 2],
Cause = split_mat[, 3],
stringsAsFactors = F)
df_icd10 <- df_icd10 %>%
mutate(across(
everything(),
~ str_replace_all(.x, "Code\\s+Detailed\\s+List\\s+Numbers\\s+Cau.*", ""))) %>%
mutate(across(
everything(),
~ str_replace(.x, "Whooping cough\\s+39\\b", "Whooping cough"))) %>%
mutate(across(
everything(),
~ str_replace(.x, "Leukaemia\\s+40\\b", "Leukaemia"))) %>%
mutate(across(
everything(),
~ str_replace(.x,
"Diseases of the skin and subcutaneous tissue\\s+41\\b",
"Diseases of the skin and subcutaneous tissue"))) %>%
mutate(across(
everything(),
~ str_replace(.x,
"^Schistosomiasis\\s+.*$",
"Schistosomiasis"))) %>%
mutate(across(
everything(),
~ str_replace_all(.x, "\\s{2,}", " "))) %>%
mutate(across(
where(is.character),
~ gsub(" - ", "-", .x))) %>%
filter(!Code %in% c("1025", "1046", "1103"))
manual_1025 <- data.frame(
Code = "1025",
Code_range = "A21-A32, A38, A42-A49, A65-A79, A81, A83-A89, B00-B04, B06-B09, B25-B49, B58-B64, B66-B94, B99",
Cause = "Remainder of certain infectious and parasitic diseases",
stringsAsFactors = F)
manual_1046 <- data.frame(
Code = "1046",
Code_range =
"C17, C23-C24, C26-C31, C37-C41, C44-C49, C51-C52, C57-C60, C62-C66, C68-C69, C73-C81, C88, C96-C97",
Cause = "Remainder of malignant neoplasms",
stringsAsFactors = F)
manual_1103 <- data.frame(
Code = "1103",
Code_range = "W20-W64, W75-W99, X10-X39, X50-X59, Y10-Y89",
Cause = "All other external causes",
stringsAsFactors = F)
df_icd10 <- bind_rows(df_icd10, manual_1025, manual_1046, manual_1103) %>%
arrange(as.numeric(Code)) %>%
mutate(across(everything(), ~ trimws(.))) # optional cleanup of leading/trailing spaces
kable(head(df_icd10, 10))